Read Data

You must read the data before trying to run code on your own machine. To read data use the following code after setting your working directory. To set your working directory, modify the following to set the file path for the folder where the data file resides. setwd(’c:/thatawesomeclass/)

Here is AirBnb data on rentals in New York city for a couple of months. Since the data contains locations of the rentals, this is spatial data. The features of each rental are related to a single location, therefore this is called point data.

data = read.csv('analysisData_with_lonlat.csv')

Visualize airbnb data

What does the plot look like?

library(ggplot2)
ggplot(data) +
  geom_point(aes(x=longitude,y=latitude),size=0.1)

Mapping Data using Google Maps

Google API Data

Google has recently changed its API requirements, and ggmap users are now required to provide an API key and enable billing. ggmap itself is outdated on CRAN. Until the new version is available on CRAN, you can install an updated version by running the following code

# if(!requireNamespace("devtools")) install.packages("devtools")
# devtools::install_github("dkahle/ggmap", ref = "tidyup")

When you load ggmap, you can set your API key with register_google() (see ?register_google for details), but don’t forget to enable the Maps Static API in the Google Cloud interface and enable billing!

For you to use the google maps code below you will have to get your own api from Google. Once you have obtained an api key, you can assign it to the object api_key.

library(ggmap)
register_google(key = api_key)

Map of New York

Get map of New York City by setting location as center of the city.

library(ggmap)
newyork_map = get_map(location=c(-73.93,40.77), zoom=10, scale=4)
ggmap(newyork_map)

Add a data layer: Map of New York City + AirBnb Rentals

ggmap(newyork_map)+
  geom_point(data=data, aes(x=longitude,y=latitude), size=0.1)

Address overplotting by setting alpha=0.2, and change the color of rental location to seagreen

ggmap(newyork_map)+
  geom_point(data=data, aes(x=longitude,y=latitude), size=0.1, alpha=0.2, color='seagreen')

Map of New York city + AirBnb rentals + price

Represent price using Yellow-orange-red scale

library(RColorBrewer)
ggmap(newyork_map)+
  geom_point(data=data, aes(x=longitude,y=latitude,color=price), size=0.1, alpha=0.2)+
  scale_color_gradientn(colors=brewer.pal(n=7, name='YlOrRd'))

Satellite Map of Manhattan

manhattan = get_map(location=c(-73.93,40.77), zoom=12, scale=4, maptype='satellite')
ggmap(manhattan)

Manhattan (Satellite) + Rentals + Price

Price is being converted into five percentile bins to make it easier to spot location differences in price.

library(Hmisc)
ggmap(manhattan)+
  geom_point(data=data, aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
  scale_color_brewer(type ='seq', palette='RdYlGn', direction=-1)

Toner Map of Manhattan

Sometimes, all we want is a simple old fashioned map.

manhattan = get_map(location=c(-73.93,40.77), zoom=12, scale=4, maptype='toner')
ggmap(manhattan)

Manhattan (Toner) + Rentals + Price

ggmap(manhattan)+
  geom_point(data=data, aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
  scale_color_brewer(type='seq', palette='RdYlGn', direction =-1)

Terrain Map of Manhattan

manhattan = get_map(location=c(-73.93,40.77), zoom=12, scale=4, maptype='terrain', source='stamen')
ggmap(manhattan)

Manhattan (Terrain) + Rentals + Price

ggmap(manhattan)+
  geom_point(data=data,aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
  scale_color_brewer(type='seq', palette='RdYlGn', direction=-1)

Roadmap of Manhattan

manhattan = get_map(location = c(-73.93,40.77),zoom = 12,scale = 4,maptype = 'roadmap')
ggmap(manhattan)

Manhattan (Roadmap) + Rentals + Price

ggmap(manhattan)+
  geom_point(data=data,aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
  scale_color_brewer(type='seq', palette='RdYlGn', direction=-1)

Map of New York

Let’s get familar with ggmaps and google maps by plotting Cold Stone Creamery (73.98W, 40.75N).
You will note ggmap uses a layered approach to plotting like ggplot2. However, unlike ggplot2 where the data is typically in the ggplot() layer, when using ggmap() the data pushed down to the plotting layers (e.g., geom_point, geom_label)

coldStoneCreamery_street = get_map(location=c(-73.988640,40.757020), zoom=18 , scale=2)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=40.75702,-73.98864&zoom=18&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-x-ND2iyfra0D14HsZjfL_TKqll4
ggmap(coldStoneCreamery_street)+
  geom_point(data=data.frame(lon=-73.988640,lat=40.757020),
             mapping=aes(lon,lat),size=4,color='steelblue')+
  geom_label(data=data.frame(lon=-73.988640,lat=40.757020),
             mapping=aes(lon,lat), label='Cold Stone Creamery', nudge_y=0.0002, size=2)

Mapping Data using library(tigris)

Above maps rely on Google API, which for now is free. An alternative is to use US Census Shapefiles from the TIGER/Line geodatabase. tigris is an R package that allows users to directly download and use TIGER/Line shapefiles. These files belong to the class sp.

library(tigris)
manhattan_tigris = tracts(state='NY', county='New York', cb=TRUE)

Map of Manhattan with library(tmap)

library(tmap) offers a flexible, layer-based, and easy to use approach to create thematic maps, such as choropleths and bubble maps. It only works with sp, sf or raster objects, but that is okay because most spatial files are spatial objects.

library(tmap)
tm_shape(manhattan_tigris)+
  tm_borders()

Convert DataFrame to sp

Of course, if the data is not a spatial object, it will have to be converted to one before it can be plotted using library(tmap). Furthermore, to overlay sp class data on a map, both must have the same projection.

library(sp)
data_sp = data
coordinates(data_sp) =~longitude+latitude

Map of Manhattan + Rentals

tm_shape(manhattan_tigris)+
  tm_borders()+
  tm_shape(data_sp)+
  tm_dots(col='steelblue', size=0.01)

Using Shape files

Read Shape File

library(rgdal) has a handy function to open shapefiles. In the code below, ‘nyc_shape’ is a folder on my computer that contains a set of nyc shape files that have been gathered from the open New York Neighborhood Tabulation Areas (NTA) database ( data pulled on Aug.5, 2019 )

library(sp)
library(rgdal)
nyc = readOGR(dsn='nyc_shape', layer='nynta')
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Kitty\Desktop\AAFM2 2022S\11_SpatialAnalysis\nyc_shape", layer: "nynta"
## with 195 features
## It has 7 fields

Examine the Spatial File

Object Type

class(nyc) # object type
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

Coordinate system

summary(nyc)  # coordinate system
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##        min       max
## x 913175.1 1067382.5
## y 120121.9  272844.3
## Is projected: TRUE 
## proj4string :
## [+proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333
## +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft
## +no_defs]
## Data attributes:
##     BoroCode   BoroName          CountyFIPS          NTACode         
##  Min.   :1   Length:195         Length:195         Length:195        
##  1st Qu.:2   Class :character   Class :character   Class :character  
##  Median :3   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :3                                                           
##  3rd Qu.:4                                                           
##  Max.   :5                                                           
##    NTAName            Shape_Leng       Shape_Area       
##  Length:195         Min.   : 11988   Min.   :  5581768  
##  Class :character   1st Qu.: 23921   1st Qu.: 19392232  
##  Mode  :character   Median : 30549   Median : 32629789  
##                     Mean   : 42006   Mean   : 43228063  
##                     3rd Qu.: 41877   3rd Qu.: 50223047  
##                     Max.   :490421   Max.   :327765030

Slots

str(nyc,max.level = 2) # slots in sp object
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  195 obs. of  7 variables:
##   ..@ polygons   :List of 195
##   ..@ plotOrder  : int [1:195] 113 182 181 133 174 185 140 98 143 137 ...
##   ..@ bbox       : num [1:2, 1:2] 913175 120122 1067383 272844
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
##   ..$ comment: chr "TRUE"

Projection Type

proj4string(nyc)
## [1] "+proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333 +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs"
nyc@proj4string
## CRS arguments:
##  +proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333
## +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft
## +no_defs

Bounding Box

nyc@bbox # bounding box
##        min       max
## x 913175.1 1067382.5
## y 120121.9  272844.3

Data Slot

head(nyc@data) # data slot

Subsetting a SpatialPolygonsDataFrame

str(nyc[1,], max.level=2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  1 obs. of  7 variables:
##   ..@ polygons   :List of 1
##   ..@ plotOrder  : int 1
##   ..@ bbox       : num [1:2, 1:2] 982208 162478 991773 174108
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot

Extract rows. Subset the first census tract in the dataset and plot it

plot(nyc[1,]) 

But, geographical entities only make sense in context. So, let us overlay census tract 1 on a plot of nyc.

plot(nyc,col='steelblue')
plot(nyc[1,], add=T, col='tomato2')

Next, extract census tract 2

plot(nyc,col='steelblue')
plot(nyc[1,], add=T, col='tomato2')
plot(nyc[2,], add=T, col='yellow')

Finally, extract censust tracts 20:60 and plot.

plot(nyc,col='steelblue')
plot(nyc[1,], add=T, col='tomato2')
plot(nyc[2,], add=T, col='yellow')
plot(nyc[20:60,], add=T, col='green')

Extract Columns

nyc@data$BoroName #or nyc$BoroName
##   [1] "Brooklyn"      "Queens"        "Queens"        "Queens"       
##   [5] "Manhattan"     "Queens"        "Queens"        "Brooklyn"     
##   [9] "Bronx"         "Queens"        "Manhattan"     "Queens"       
##  [13] "Queens"        "Queens"        "Brooklyn"      "Brooklyn"     
##  [17] "Brooklyn"      "Bronx"         "Brooklyn"      "Queens"       
##  [21] "Queens"        "Bronx"         "Queens"        "Queens"       
##  [25] "Brooklyn"      "Brooklyn"      "Brooklyn"      "Brooklyn"     
##  [29] "Brooklyn"      "Brooklyn"      "Brooklyn"      "Brooklyn"     
##  [33] "Brooklyn"      "Brooklyn"      "Bronx"         "Brooklyn"     
##  [37] "Brooklyn"      "Brooklyn"      "Brooklyn"      "Queens"       
##  [41] "Brooklyn"      "Bronx"         "Brooklyn"      "Manhattan"    
##  [45] "Queens"        "Queens"        "Bronx"         "Bronx"        
##  [49] "Bronx"         "Bronx"         "Brooklyn"      "Brooklyn"     
##  [53] "Queens"        "Bronx"         "Queens"        "Queens"       
##  [57] "Staten Island" "Manhattan"     "Queens"        "Queens"       
##  [61] "Queens"        "Queens"        "Queens"        "Queens"       
##  [65] "Queens"        "Bronx"         "Queens"        "Queens"       
##  [69] "Queens"        "Bronx"         "Brooklyn"      "Queens"       
##  [73] "Brooklyn"      "Brooklyn"      "Queens"        "Queens"       
##  [77] "Queens"        "Queens"        "Queens"        "Brooklyn"     
##  [81] "Queens"        "Bronx"         "Bronx"         "Staten Island"
##  [85] "Staten Island" "Bronx"         "Staten Island" "Brooklyn"     
##  [89] "Brooklyn"      "Staten Island" "Bronx"         "Queens"       
##  [93] "Queens"        "Queens"        "Manhattan"     "Manhattan"    
##  [97] "Queens"        "Staten Island" "Manhattan"     "Queens"       
## [101] "Manhattan"     "Manhattan"     "Manhattan"     "Brooklyn"     
## [105] "Queens"        "Queens"        "Staten Island" "Brooklyn"     
## [109] "Bronx"         "Manhattan"     "Brooklyn"      "Manhattan"    
## [113] "Staten Island" "Staten Island" "Staten Island" "Queens"       
## [117] "Bronx"         "Bronx"         "Brooklyn"      "Brooklyn"     
## [121] "Bronx"         "Brooklyn"      "Brooklyn"      "Brooklyn"     
## [125] "Brooklyn"      "Brooklyn"      "Queens"        "Queens"       
## [129] "Queens"        "Queens"        "Bronx"         "Queens"       
## [133] "Queens"        "Brooklyn"      "Brooklyn"      "Brooklyn"     
## [137] "Queens"        "Manhattan"     "Manhattan"     "Staten Island"
## [141] "Staten Island" "Manhattan"     "Brooklyn"      "Brooklyn"     
## [145] "Manhattan"     "Queens"        "Queens"        "Staten Island"
## [149] "Manhattan"     "Manhattan"     "Brooklyn"      "Staten Island"
## [153] "Bronx"         "Bronx"         "Manhattan"     "Bronx"        
## [157] "Bronx"         "Bronx"         "Manhattan"     "Manhattan"    
## [161] "Manhattan"     "Manhattan"     "Bronx"         "Manhattan"    
## [165] "Manhattan"     "Manhattan"     "Manhattan"     "Bronx"        
## [169] "Bronx"         "Bronx"         "Bronx"         "Bronx"        
## [173] "Bronx"         "Bronx"         "Bronx"         "Bronx"        
## [177] "Manhattan"     "Manhattan"     "Bronx"         "Bronx"        
## [181] "Brooklyn"      "Queens"        "Brooklyn"      "Brooklyn"     
## [185] "Staten Island" "Queens"        "Queens"        "Staten Island"
## [189] "Staten Island" "Queens"        "Queens"        "Staten Island"
## [193] "Staten Island" "Brooklyn"      "Brooklyn"
table(nyc$BoroName)
## 
##         Bronx      Brooklyn     Manhattan        Queens Staten Island 
##            38            51            29            58            19

Plot only Manhattan

plot(nyc[nyc$BoroName=='Manhattan',])

Maps with library(tmap)

Plot the five boroughs Let’s visualizing with library(tmap) Learn more about library(tmap) in this vignette

library(tmap)
tm_shape(nyc)+
  tm_borders()

tm_shape(nyc)+
  tm_polygons('BoroName')

tm_shape(nyc)+
  tm_polygons('BoroName')+
  tm_style('natural')+  # works like library(ggthemes)
  tm_compass()

nyc_map = 
  tm_shape(nyc)+
  tm_polygons('BoroName')+
  tm_style('natural')  # works like library(ggthemes)
tmap_save(nyc_map, filename='nyc_map.jpg')  # picture files
tmap_save(nyc_map, filename='nyc_map.html')  # Saving as html creates an interactive map  

Merge Data with a Map

Let us plot population data on to a map of New York. Population Data is available from Open New York. In this instance, I have downloaded data on to my local machine before reading it into R. This is a good practice as repeatedly downloading relatively static data (population data is not updated daily or even weekly) by reading from the website directly places an unnecessary burden on the server.
Here, we read the data and create a field for change in population.

pop = read.csv('nyc_pop.csv')
library(dplyr)
library(tidyr)
nyc_pop = 
  pop%>%
  spread(key=Year, value=Population)%>%
  rename(year_2000='2000', year_2010='2010')%>%
  mutate(pop_change=((year_2010-year_2000)/year_2000)*100)
head(nyc_pop)

Merge and Plot

library(rgeos)
nyc_spdf = merge(x=nyc, y=nyc_pop, by.x='NTACode', by.y='NTA.Code')
tm_shape(nyc_spdf)+
  tm_fill(col='pop_change', palette=brewer.pal(n=7,name='BrBG'), 
          breaks=c(-300,-200,-100,0,100,200,300))+
  tm_style('cobalt')


This file was generated using R Version 4.1.2